#include "config.h"
#include "iser.h"
#include "iser_rdma.h"
#include "dbg.h"
#include "core.h"
#include "net_vip.h"
#include "etcd.h"

#define NUM_LOGIN_TASKS         2 /* one posted for req rx, one for reply tx */

/* all iser connections */
static LIST_HEAD(iser_conn_list);

int iser_conn_create(struct iser_conn **_conn)
{
        int ret;
        struct iser_conn *conn;

        /* build a new connection structure */
        ret = ymalloc((void **)&conn, sizeof(*conn));
        if (ret) {
                GOTO(err_ret, ret);
        }

        memset(conn, 0, sizeof(*conn));

        conn->h.rdma = 1;
        conn->h.refcount = 0;
        conn->h.state = STATE_INIT;
        conn->h.closed = 0;
        param_set_defaults(conn->h.session_param, session_keys);

        INIT_LIST_HEAD(&conn->h.entry);

        INIT_LIST_HEAD(&conn->buf_alloc_list);
        INIT_LIST_HEAD(&conn->rdma_rd_list);
        INIT_LIST_HEAD(&conn->iosubmit_list);
        INIT_LIST_HEAD(&conn->resp_tx_list);
        INIT_LIST_HEAD(&conn->sent_list);
        INIT_LIST_HEAD(&conn->post_recv_list);

        DINFO("alloc conn:%p\n", conn);
        list_add(&conn->conn_list, &iser_conn_list);

        *_conn = conn;

        return 0;
err_ret:
        return ret;
}

static char *iser_conn_login_phase_name(enum iser_login_phase phase)
{
        switch (phase) {
        case LOGIN_PHASE_INIT:
                return "INIT";
        case LOGIN_PHASE_START:
                return "START";
        case LOGIN_PHASE_LAST_SEND:
                return "LAST_SEND";
        case LOGIN_PHASE_FF:
                return "FF";
        default:
                return "Illegal";
        }
}

void iser_conn_login_phase_set(struct iser_conn *conn,
                                      enum iser_login_phase phase)
{
        DINFO("conn:%p from:%s to:%s\n", conn,
                iser_conn_login_phase_name(conn->login_phase),
                iser_conn_login_phase_name(phase));
        conn->login_phase = phase;
}

inline int iser_conn_get(struct iser_conn *conn)
{
        /* TODO: check state */
        conn->h.refcount++;
        DBUG("refcnt:%d\n", conn->h.refcount);
        return 0;
}

inline int iser_conn_getn(struct iser_conn *conn, int n)
{
        int new_count = conn->h.refcount + n;
        //DINFO("refcnt:%d + %d = %d\n", conn->h.refcount, n, new_count);
        conn->h.refcount = new_count;
        return 0;
}

inline void iser_conn_put(struct iser_conn *conn)
{
        if (likely(conn)) {
                if(conn->h.refcount > 0)        //todo. this mechanism is a problem..
                        conn->h.refcount--;
                else
                        DWARN("refcount already be zero.\r\n");
                //YASSERT(conn->h.refcount >= 0);

                //DINFO("refcnt:%d\n", conn->h.refcount);
                if (unlikely(conn->h.refcount == 0)) {
                        YASSERT(conn->h.state == STATE_CLOSE || conn->h.state == STATE_CLOSED
                                        || conn->h.state == STATE_EXIT);

                        iser_sched_add_event(&conn->sched_conn_free);
                }
        }
}

/*
 * On connection shutdown.
 */
static void iser_free_ff_resources(struct iser_conn *conn)
{
        int ret;

        if (!conn->ff_res_alloc)
                return;

        DINFO("conn:%p pdu_mr:%p pdu_pool:%p task_pool:%p\n",
                conn, conn->pdu_data_mr,
                conn->pdu_data_pool, conn->task_pool);

        /* release mr and free the lists */
        if (conn->pdu_data_mr) {
                if (conn->private_mem) {
                        /**
                         * iser_dev_create 对polling core整体内存进行
                         * 注册，只需注册一次,不再需要反注册
                         * ret = iser_dev_deregmr(conn->mr);
                         * if (ret)
                         *       DERROR("conn:%p ibv_dereg_mr failed, %m\n", conn);
                         **/
                } else {

                        ret = iser_dev_deregmr(conn->pdu_data_mr);
                        if (ret)
                                DERROR("conn:%p ibv_dereg_mr failed, %m\n", conn);
                }
        }
        if (conn->pdu_data_pool) {
                if(conn->private_mem) {
                        mempages_free(conn->private_mem, conn->pdu_data_pool);
                } else
                        free(conn->pdu_data_pool);

                conn->pdu_data_pool = NULL; //possible problem.
        }

        if (conn->task_pool && !conn->private_mem)
                free(conn->task_pool);

        conn->nop_in_task = NULL;
        conn->text_tx_task = NULL;

        conn->ff_res_alloc = 0;
}

/*
 * When ready for full-feature mode, free login-phase resources.
 */
void iser_conn_free_login_resources(struct iser_conn *conn)
{
        int ret;

        if (!conn->login_res_alloc)
                return;

        DINFO("conn:%p, login phase:%s\n", conn,
                iser_conn_login_phase_name(conn->login_phase));

        /* release mr and free the lists */
        if (conn->login_data_mr) {
                ret= iser_dev_deregmr(conn->login_data_mr);
                if (ret)
                        DERROR("conn:%p ibv_dereg_mr failed, %m\n", conn);
        }
        if (conn->login_data_pool)
                free(conn->login_data_pool);
        if (conn->login_task_pool)
                free(conn->login_task_pool);
        conn->login_tx_task = NULL;

        conn->login_res_alloc = 0;
}

int iser_conn_alloc_login_resources(struct iser_conn *conn)
{
        int ret;
        unsigned long buf_size = ALIGN_TO_32(conn->rsize);
        unsigned long pool_size = NUM_LOGIN_TASKS * buf_size;
        struct iser_task *login_task[NUM_LOGIN_TASKS];
        uint8_t *pdu_data_buf, *task_buf;
        unsigned int i;

        DINFO("conn:%p login tasks num:%u, buf_sz:%lu (rx_sz:%u tx_sz:%u)\n",
                conn, NUM_LOGIN_TASKS, buf_size, conn->rsize, conn->ssize);

        ret = ymalloc((void **)&conn->login_data_pool, pool_size);
        if (ret) {
                DERROR("conn:%p malloc login_data_pool sz:%lu failed\n",
                        conn, pool_size);
                GOTO(err_ret, ret);
        }

        memset(conn->login_data_pool, 0, pool_size);
        
        ret = iser_dev_regmr(&conn->login_data_mr, conn->dev->pd,
                                conn->login_data_pool, pool_size);
        if (ret) {
                DERROR("conn:%p ibv_reg_mr login pool failed, %m\n", conn);
                free(conn->login_data_pool);
                conn->login_data_pool = NULL;

                GOTO(err_ret, ret);
        }

        pool_size = NUM_LOGIN_TASKS * sizeof(struct iser_task);
        ret = ymalloc((void **)&conn->login_task_pool, pool_size);
        if (ret) {
                DERROR("conn:%p malloc login_task_pool sz:%lu failed\n",
                        conn, pool_size);
                iser_dev_deregmr(conn->login_data_mr);
                conn->login_data_mr = NULL;
                free(conn->login_data_pool);
                conn->login_data_pool = NULL;

                GOTO(err_ret, ret);
        }
        memset(conn->login_task_pool, 0, pool_size);

        conn->login_res_alloc = 1;

        YASSERT(conn->login_data_pool);
        pdu_data_buf = conn->login_data_pool;
        task_buf = conn->login_task_pool;
        for (i = 0; i < NUM_LOGIN_TASKS; i++) {
                login_task[i] = (struct iser_task *) task_buf;

                iser_task_init(login_task[i], conn,
                               pdu_data_buf, buf_size,
                               conn->login_data_mr);

                task_buf += sizeof(struct iser_task);
                pdu_data_buf += buf_size;
        }

        DINFO("post_recv login rx task:%p\n", login_task[0]);
        ret = iser_dev_post_recv(conn, login_task[0], 1);
        if (ret) {
                DERROR("conn:%p post_recv login rx-task failed\n", conn);
                iser_conn_free_login_resources(conn);
                GOTO(err_ret, ret);
        }

        DINFO("saved login tx-task:%p\n", login_task[1]);
        conn->login_tx_task = login_task[1];

        return 0;
err_ret:
        return ret;
}

static int iser_ib_clear_iosubmit_list(struct iser_conn *conn)
{
        struct iser_task *task;

        DINFO("start\n");
        while (!list_empty(&conn->iosubmit_list)) {
                task = list_entry(conn->iosubmit_list.next,
                                        struct iser_task, exec_list);
                list_del(&task->exec_list);
                iser_task_complete(task); /* must free, task keeps rdma buffer */
        }
        return 0;
}

static int iser_ib_clear_rdma_rd_list(struct iser_conn *conn)
{
        struct iser_task *task;

        DINFO("start\n");
        while (!list_empty(&conn->rdma_rd_list)) {
                task = list_entry(conn->rdma_rd_list.next,
                                        struct iser_task, rdma_list);
                list_del(&task->rdma_list);
                iser_task_complete(task);  /* must free, task keeps rdma buffer */
        }
        return 0;
}

int iser_ib_clear_tx_list(struct iser_conn *conn)
{
        struct iser_task *task;

        DINFO("start clear tx list\n");
        while (!list_empty(&conn->resp_tx_list)) {
                task = list_entry(conn->resp_tx_list.next,
                                        struct iser_task, tx_list);
                list_del(&task->tx_list);
                iser_task_complete(task);  /* must free, task keeps rdma buffer */
        }
        return 0;
}

static int iser_ib_clear_sent_list(struct iser_conn *conn)
{
        struct iser_task *task;

        DINFO("start\n");
        while (!list_empty(&conn->sent_list)) {
                task = list_entry(conn->sent_list.next,
                                        struct iser_task, tx_list);
                list_del(&task->tx_list); /* don't free, future completion guaranteed */
        }
        return 0;
}

static int iser_ib_clear_post_recv_list(struct iser_conn *conn)
{
        struct iser_task *task;

        DINFO("start\n");
        while (!list_empty(&conn->post_recv_list)) {
                task = list_entry(conn->post_recv_list.next,
                                        struct iser_task, recv_list);
                list_del(&task->recv_list); /* don't free, future completion guaranteed */
        }
        return 0;
}

/*
 * Called when the connection is freed, from iscsi, but won't do anything until
 * all posted WRs have gone away.  So also called again from RX progress when
 * it notices this happens.
 */
void iser_conn_free(void *_conn)
{
        int ret;
        struct iser_conn *conn = _conn;
        struct iscsi_session *sess;

        DINFO("conn:%p refcnt:%d qp:%p cm_id:%p\n", conn, conn->h.refcount, conn->qp_hndl, conn->cm_id);

        if(conn->h.refcount > 0) {
                conn->h.refcount --;
                return;
        }

        YASSERT(conn->h.refcount == 0);

        if (conn->h.session && conn->h.session->target) {
                (void) target_disconnect(conn->h.session->target,
                                         _inet_ntop((struct sockaddr *)&conn->h.peer),
                                         ntohs(conn->h.peer.sin_port));
        }

        iser_ib_clear_iosubmit_list(conn);
        iser_ib_clear_rdma_rd_list(conn);
        iser_ib_clear_tx_list(conn);
        iser_ib_clear_sent_list(conn);
        iser_ib_clear_post_recv_list(conn);

        /* try to free unconditionally, resources freed only if necessary */
        iser_conn_free_login_resources(conn);
        iser_free_ff_resources(conn);

        if (conn->qp_hndl) {
                ret = iser_rdma_destroy_qp(conn->cm_id);
                if (ret)
                        DERROR("conn:%p ibv_destroy_qp failed, %m\n", conn);
        }
        if (conn->cm_id) {
                ret = iser_rdma_destroy_id(conn->cm_id);
                if (ret)
                        DERROR("conn:%p rdma_destroy_id failed, %m\n", conn);
        }

        /* delete from session; was put there by conn_add_to_session() */
        list_del(&conn->h.entry);
        list_del(&conn->conn_list);

        if (conn->h.initiator)
                free(conn->h.initiator);

        sess = conn->h.session;
        if (sess && conn_empty(sess))
                session_free(sess);

        if (conn->peer_name)
                free(conn->peer_name);
        if (conn->self_name)
                free(conn->self_name);

        DINFO("conn:%p freed\n", conn);

        conn->h.state = STATE_INIT;
	if (conn->yield) {
		schedule_resume(&conn->sched_task, 0, NULL);
	}

        yfree((void **) &conn);
}

/*
 * Start closing connection. Transfer IB QP to error state.
 * This will be followed by WC error and buffers flush events.
 * We also should expect DISCONNECTED and TIMEWAIT_EXIT events.
 * Only after the draining is over we are sure to have reclaimed
 * all buffers (and tasks). After the RDMA CM events are collected,
 * the connection QP may be destroyed, and its number may be recycled.
 */
void iser_conn_close(struct iser_conn *conn)
{
        int ret;
        char ip[MAX_NAME_LEN];

        if (conn->h.state == STATE_CLOSED || conn->h.closed)
                return;

        DINFO("rdma_disconnect conn:%p\n", conn);
        ret = iser_rdma_disconnect(conn->cm_id);
        if (ret)
                DERROR("conn:%p rdma_disconnect failed, %m\n", conn);

        iser_ib_clear_tx_list(conn);

        iser_sched_remove_event(&conn->sched_buf_alloc);
        iser_sched_remove_event(&conn->sched_rdma_rd);
        iser_sched_remove_event(&conn->sched_iosubmit);
        iser_sched_remove_event(&conn->sched_tx);
        iser_sched_remove_event(&conn->sched_post_recv);

	if(conn->core_reg)
		core_check_dereg("iser_check", (void *)conn);

        conn->h.closed = 1;
        conn->h.state = STATE_CLOSED;
        DERROR("conn:%p cm_id:0x%p state: CLOSE, refcnt:%d\n",
                conn, conn->cm_id, conn->h.refcount);

        strcpy(ip, inet_ntoa(((struct sockaddr_in *)&conn->peer_addr)->sin_addr));
        ret = etcd_del("client_iplist", ip);
        if (unlikely(ret))
                DWARN("remove %s from etcd failed!!!!\n", ip);
}

int iser_conn_show(struct iscsi_conn *iscsi_conn, char *buf,
                     int rest)
{
        struct iser_conn *conn = ISER_CONN(iscsi_conn);
        int len;

        len = snprintf(buf, rest, "RDMA IP Address: %s", conn->peer_name);
        return len;
}

int iser_conn_getsockname(struct iscsi_conn *iscsi_conn,
                            struct sockaddr *sa, socklen_t *len)
{
        struct iser_conn *conn = ISER_CONN(iscsi_conn);

        if (*len > sizeof(conn->self_addr))
                *len = sizeof(conn->self_addr);
        memcpy(sa, &conn->self_addr, *len);
        return 0;
}

int iser_conn_getpeername(struct iscsi_conn *iscsi_conn,
                            struct sockaddr *sa, socklen_t *len)
{
        struct iser_conn *conn = ISER_CONN(iscsi_conn);

        if (*len > sizeof(conn->peer_addr))
                *len = sizeof(conn->peer_addr);
        memcpy(sa, &conn->peer_addr, *len);
        return 0;
}

void iser_conn_force_close(struct iscsi_conn *iscsi_conn)
{
        struct iser_conn *conn = ISER_CONN(iscsi_conn);

        DERROR("conn:%p\n", conn);
        conn->h.state = STATE_CLOSED;
        iser_conn_close(conn);
        iser_conn_put(conn);
}


#define MEM_SIZE (2 * 1024 * 1024)
/*
 *  * Ready for full feature, allocate resources.
 *   */

static int __iser_check_vip__(struct iser_conn *conn)
{
        int ret;

        if (conn->h.state == STATE_FULL) {
                ret = target_localize_confirm(conn->h.target);
                if (unlikely(ret)) {
                        GOTO(err_close, ret);
                }
        }

        return 0;
err_close:
        DINFO("connection %s:%d will be closed\n",
              inet_ntoa(conn->h.peer.sin_addr), ntohs(conn->h.peer.sin_port));

        conn->h.state = STATE_CLOSE;
        YASSERT(ret == EREMCHG);

        iser_conn_close(conn);

        return ret;
}

static void __iser_check__(void *arg)
{
        struct iser_conn *conn = arg;

        if (conn->h.state == STATE_CLOSED || conn->h.state == STATE_CLOSE) {
                conn->h.in_check = 0;
                iser_conn_put(conn);
                return ;
        }

#if ENABLE_ISCSI_CACHE_REUSE
        iscsi_session_check(&conn->h);
#endif

        if (conn->h.state == STATE_CLOSED || conn->h.state == STATE_CLOSE) {
                conn->h.in_check = 0;
                iser_conn_put(conn);
                return ;
        }

#if 0
        iscsi_connection_check(&conn->h);

        if (conn->h.state == STATE_CLOSED || conn->h.state == STATE_CLOSE) {
                conn->h.in_check = 0;
                iser_conn_put(conn);
                return ;
        }
#endif

        DINFO("%s:%d connect to %s\n", inet_ntoa(conn->h.peer.sin_addr),
                        ntohs(conn->h.peer.sin_port), conn->h.session->target->name);

        if (sanconf.iscsi_vip.vip_count && iser_netvip_in_vipnet(&conn->h.self)) {

                __iser_check_vip__(conn);
        }

        conn->h.in_check = 0;
        iser_conn_put(conn);
}

static void __iser_check_core(void *arg, void *arg2)
{
        struct iser_conn *conn = arg;
        time_t now;
        (void)arg2;
        now = gettime();
        DINFO("iser target check last %ld, now %ld \n", conn->h.target->confirm, now);

        if ((now - conn->h.target->confirm) % CONFIRM_INTERVAL && conn->h.in_check  == 0) {
                conn->h.in_check = 1;
                iser_conn_get(conn);
                schedule_task_new("isser_check", __iser_check__, conn, -1);
        }
}

int iser_conn_alloc_ff_resources(struct iser_conn *conn)
{
        /* ToDo: need to fix the ISCSI_PARAM_INITIATOR_RDSL bug in initiator */
        /* buf_size = ALIGN_TO_32(MAX(conn->rsize, conn->ssize)); */
        int ret = 0;
        unsigned long buf_size = /*ALIGN_TO_32(max(conn->rsize, conn->ssize));//*/ALIGN_TO_32(conn->rsize); /* includes headers */
        unsigned long num_tasks = conn->max_outst_pdu + 2; /* all rx, text tx, nop-in*/
        unsigned long alloc_sz;
        uint8_t *pdu_data_buf, *task_buf;
        struct iser_task *task;
        unsigned int i;

        DINFO("conn:%p max_outst:%u buf_sz:%lu (ssize:%u rsize:%u) task size %lu\n",
                conn, conn->max_outst_pdu, buf_size,
                conn->ssize, conn->rsize, sizeof(struct iser_task));

        if (conn->private_mem) {
                alloc_sz = num_tasks * (buf_size + sizeof(struct iser_task));
                alloc_sz = (alloc_sz % MEM_SIZE) ? ( alloc_sz / MEM_SIZE + 1) * MEM_SIZE : alloc_sz;

                {
                        int sz = 1;
                        while (sz < alloc_sz)
                                sz <<= 1;
                        alloc_sz = sz;
                }

                conn->pdu_data_pool = mempages_alloc(conn->private_mem, alloc_sz);
                conn->task_pool =  conn->pdu_data_pool + num_tasks * buf_size;

                memset(conn->pdu_data_pool, 0, alloc_sz);
                
                conn->mr = conn->dev->ff_resources_mr;
                conn->pdu_data_mr = conn->mr;
                DINFO("for bug test the conn mr is %p addr is %p\n", conn->mr, conn->private_mem);
        } else {
                alloc_sz = num_tasks * buf_size;
                ret = ymalloc((void **)&conn->pdu_data_pool, alloc_sz);
                if (ret) {
                        DERROR("conn:%p malloc pdu_data_buf sz:%lu failed\n",
                                        conn, alloc_sz);
                        GOTO(err_ret, ret);
                }


                ret = iser_dev_regmr(&conn->pdu_data_mr, conn->dev->pd,
                                conn->pdu_data_pool, alloc_sz);
                if (!conn->pdu_data_mr) {
                        DERROR("conn:%p ibv_reg_mr pdu_data_pool failed, %m\n",
                                        conn);
                        free(conn->pdu_data_pool);
                        conn->pdu_data_pool = NULL;
                        GOTO(err_ret, ret);
                }

                alloc_sz = num_tasks * sizeof(struct iser_task);
                ret = ymalloc((void **)&conn->task_pool, alloc_sz);
                if (ret) {
                        DERROR("conn:%p malloc task_pool sz:%lu failed\n",
                                        conn, alloc_sz);
                        iser_dev_deregmr(conn->pdu_data_mr);
                        conn->pdu_data_mr = NULL;
                        free(conn->pdu_data_pool);
                        conn->pdu_data_pool = NULL;
                        GOTO(err_ret, ret);
                }
                memset(conn->task_pool, 0, alloc_sz);

        }

        conn->ff_res_alloc = 1;
        pdu_data_buf = conn->pdu_data_pool;
        task_buf = conn->task_pool;
        for (i = 0; i < conn->max_outst_pdu; i++) {
                task = (void *) task_buf;

                iser_task_init(task, conn,
                               pdu_data_buf, buf_size,
                               conn->pdu_data_mr);

                task_buf += sizeof(*task);
                /* ToDo: need to fix the ISCSI_PARAM_INITIATOR_RDSL bug in initiator */
                /* pdu_data_buf += conn->rsize + conn->ssize; */
                pdu_data_buf += buf_size;

                ret = iser_dev_post_recv(conn, task, 1);
                if (ret) {
                        DERROR("conn:%p post_recv (%d/%d) failed\n",
                                conn, i, conn->max_outst_pdu);
                        iser_free_ff_resources(conn);
                        GOTO(err_ret, ret);
                }
        }

        /* initialize unsolicited tx task: nop-in */
        task = (void *) task_buf;
        iser_task_unsolicited_init(task, conn, pdu_data_buf,
                                   buf_size, conn->pdu_data_mr);
        conn->nop_in_task = task;
        task_buf += sizeof(*task);
        pdu_data_buf += buf_size;

        /* initialize tx task: text/login */
        task = (void *) task_buf;
        iser_task_unsolicited_init(task, conn, pdu_data_buf,
                                   buf_size, conn->pdu_data_mr);
        conn->text_tx_task = task;

	if (conn->h.target) {
 		int hash;
		hash = core_hash(&conn->h.target->fileid);
		if (conn->private_mem) {
			core_check_register(hash, "iser_check", conn, __iser_check_core);
                        conn->core_reg = 1;
                }
	}


        return 0;
err_ret:
        return ret;
}

